{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "509d5a1e-10f2-4294-a104-9fdb85a29b0c",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "SLF4J: Class path contains multiple SLF4J bindings.\n",
      "SLF4J: Found binding in [jar:file:/root/spark-3.3.1-bin-hadoop2/jars/log4j-slf4j-impl-2.17.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]\n",
      "SLF4J: Found binding in [jar:file:/root/hadoop-2.7.7/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]\n",
      "SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.\n",
      "SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]\n",
      "24/04/03 22:04:56 WARN Utils: Your hostname, vsr542 resolves to a loopback address: 127.0.1.1; using 10.0.2.142 instead (on interface eno1)\n",
      "24/04/03 22:04:56 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address\n",
      "Setting default log level to \"WARN\".\n",
      "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
      "24/04/03 22:04:57 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n",
      "24/04/03 22:05:00 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.\n"
     ]
    }
   ],
   "source": [
    "import findspark\n",
    "findspark.init()\n",
    "\n",
    "from pyspark import SparkConf, SparkContext\n",
    "nativesql_jars = \"/path/to/gluten-XXXX.jar\"\n",
    "conf = SparkConf().setAppName(\"PySpark Gluten\").setMaster(\"yarn\")\n",
    "conf.set(\"spark.executor.instances\", \"2\")\n",
    "conf.set(\"spark.executor.memory\", \"6g\")\n",
    "conf.set(\"spark.executor.cores\", \"2\")\n",
    "conf.set(\"spark.driver.memory\", \"2g\")\n",
    "conf.set(\"spark.memory.offHeap.enabled\", \"true\")\n",
    "conf.set(\"spark.memory.offHeap.size\", \"2g\")\n",
    "conf.set(\"spark.executor.memoryOverhead\", \"384M\")\n",
    "conf.set(\"spark.driver.extraClassPath\", nativesql_jars)\n",
    "conf.set(\"spark.executor.extraClassPath\", nativesql_jars)\n",
    "conf.set(\"spark.plugins\", \"org.apache.gluten.GlutenPlugin\")\n",
    "conf.set(\"spark.gluten.loadLibFromJar\", \"false\")\n",
    "conf.set(\"spark.shuffle.manager\", \"org.apache.spark.shuffle.sort.ColumnarShuffleManager\")\n",
    "sc = SparkContext(conf=conf)\n",
    "from pyspark.sql import SparkSession\n",
    "spark_session = SparkSession(sc)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6c4ec66a-3b8c-4a65-b948-8420b492333d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "== Physical Plan ==\n",
      "VeloxColumnarToRowExec\n",
      "+- ^(1) FilterExecTransformer (isnotnull(category#1) AND (category#1 = Cellphone))\n",
      "   +- ^(1) InputIteratorTransformer[product#0, category#1, revenue#2L]\n",
      "      +- ^(1) InputAdapter\n",
      "         +- ^(1) RowToVeloxColumnar\n",
      "            +- *(1) Scan ExistingRDD[product#0,category#1,revenue#2L]\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df = spark_session.createDataFrame(\n",
    "    [\n",
    "        (\"Normal\", \"Cellphone\", 6000),\n",
    "        (\"Normal\", \"Tablet\", 1500),\n",
    "        (\"Mini\", \"Tablet\", 5500),\n",
    "        (\"Mini\", \"Cellphone\", 5000),\n",
    "        (\"Foldable\", \"Cellphone\", 6500),\n",
    "        (\"Foldable\", \"Tablet\", 2500),\n",
    "        (\"Pro\", \"Cellphone\", 3000),\n",
    "        (\"Pro\", \"Tablet\", 4000),\n",
    "        (\"Pro Max\", \"Cellphone\", 4500)\n",
    "    ],\n",
    "    [\"product\", \"category\", \"revenue\"]\n",
    ")\n",
    "df.filter(\"category = 'Cellphone'\").explain()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  },
  "nbTranslate": {
   "displayLangs": [
    "*"
   ],
   "hotkey": "alt-t",
   "langInMainMenu": true,
   "sourceLang": "en",
   "targetLang": "fr",
   "useGoogleTranslate": true
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
